* MERGING TWO ORIGINAL DATA FILES WITH UNIQUE ID
cd "Z:\IAS STATA files\ONS Omnibus 2002\stata6"
set more off
use f0203303.dta, clear

capture drop serial2
tostring serial, gen(serial2) format(%16.0f)			// in each data set, to ensure serial is unique
capture drop hhno
bysort serial2: gen hhno=_N
sort serial2
save part2, replace

use f0203192.dta, clear
capture drop serial2
tostring serial, gen(serial2) format(%16.0f)			
capture drop hhno
bysort serial2: gen hhno=_N
sort serial2
save part1, replace

merge serial2 using part2
save working, replace
erase part1.dta
erase part2.dta

drop regiona-n11to15 respwith-tengrp m303_2a-m303_3d m303_5-m303_10 m303prac-m303_pr9 // drops unwanted drink-driving vars, but keeps sex, age, weights
drop m192_10-spec53 m192_35-m192_575 	// drops unwanted drinking vars on knowledge of units

* WEIGHTS
gen fw=int(wta*1000) // Generates a frequency weight from wta, which is a pweight
/* This creates the same results as using the tab option under SVY, without converting the weights - BUT
	it creates completely inaccurate sample sizes, and is inappropriate for regressions */




****************************************************************************************************************
*** DATA CLEANING
****************************************************************************************************************

/* QUANTITIES NOTES: 
	1 unit (10ml alcohol) = half pint of normal beer/cider, glass of wine, small glass of fortified wine, shot of spirits
	1.5 units =  large cans of normal beer/cider, half pint of strong beer
	See also http://www.drinkaware.co.uk/howmany_male.php for a good unit calculator for popular brands
	CLEANING (for '97' values): followed ONS in using an exact conversion of beer bottles to pints (1 pint = 0.5863 
	litres; this means that all beer litres must be multiplied by 2/0.5863 = 3.411 to reach units, as 1 pint = 2 units).
	All replacements are based on visual inspection; this is just a log of changes */
* NOTE: how is a glass of wine classed as 1 unit!!!

list serial2 specstb stbrewam stbrew if stbrewam==97 | stbrewam==98, nol
capture drop stbrewam2
gen stbrewam2=stbrewam 	
replace stbrewam2=.d if stbrewam==99 			// 99 indicates 'don't know', hence missing (.d); none have 98 (refusal)
label var stbrewam2 "stbrewam with unusual values corrected"
* FORMAT for replace is: constant conversion * no. containers * l per container
replace stbrewam2=(2/0.5863)*1*0.55 if serial2=="2002020401127"	// Assumed 'cans' means 1 can
replace stbrewam2=(2/0.5863)*3*0.33 if serial2=="2002030702501"	// Assumed bottles are 330ml
replace stbrewam2=(2/0.5863)*2*0.50 if serial2=="2002030702520"
replace stbrewam2=(2/0.5863)*2*0.60 if serial2=="2002030702525"	// Assumed large cans = 600ml
replace stbrewam2=(2/0.5863)*1*0.33 if serial2=="2002030802605"	// Assumed bottle is 330ml
replace stbrewam2=(2/0.5863)*5*0.33 if serial2=="2002021004116"
replace stbrewam2=(2/0.5863)*1*0.44 if serial2=="2002021004117"
replace stbrewam2=(2/0.5863)*8*0.33 if serial2=="2002021004220"		
replace stbrewam2=(2/0.5863)*4*0.33 if serial2=="2002031708506"
list serial2 specstb stbrewam2 stbrewam stbrew if stbrewam==97 | stbrewam==98, nol

list serial2 specnmb nmbrewam nmbrew if nmbrewam==97 | nmbrewam==98, nol
capture drop nmbrewam2
gen nmbrewam2=nmbrewam if nmbrewam~=99					// 99 indicates 'don't know', hence missing
replace nmbrewam2=.d if nmbrewam2==99
label var nmbrewam2 "nmbrewam with unusual values corrected"
replace nmbrewam2=(2/0.5863)*1*0.50 if serial2=="2002021405916"
replace nmbrewam2=(2/0.5863)*1*0.275 if serial2=="2002021708019"
replace nmbrewam2=(2/0.5863)*2*0.275 if serial2=="2002021708404"
replace nmbrewam2=(2/0.5863)*1*3 if serial2=="2002022209704"
replace nmbrewam2=(2/0.5863)*2*0.50 if serial2=="2002030702520"
replace nmbrewam2=(2/0.5863)*1*0.75 if serial2=="2002030802605"
replace nmbrewam2=(2/0.5863)*1*0.50 if serial2=="2002031004124"	// assumed 'can' is 500ml
replace nmbrewam2=(2/0.5863)*2*0.25 if serial2=="2002031708310"
replace nmbrewam2=(2/0.5863)*1*0.33 if serial2=="2002031708504"
replace nmbrewam2=(2/0.5863)*8*0.33 if serial2=="2002031708506"
replace nmbrewam2=(2/0.5863)*1*0.33 if serial2=="2002031708521"
replace nmbrewam2=(2/0.5863)*3*0.50 if serial2=="2002031708527"
list serial2 specnmb nmbrewam2 nmbrewam nmbrew if nmbrewam==97 | nmbrewam==98, nol

* Spirits: coded in 25ml singles, hence responses in litres are (1/0.025) = 40 shots per litre (28 shots per 75cl), abv 40%.
* NOTE ALSO that one case reports 70 units (nearly 2L!), but very infrequent so not that important
list serial2 xspirtam spiritam spirit if spiritam==97 | spiritam==98, nol
capture drop spiritam2
gen spiritam2=spiritam if spiritam~=99					// 99 indicates 'don't know', hence missing
replace spiritam2=.d if spiritam==99
label var spiritam2 "spiritam with unusual values corrected"
replace spiritam2=(1/0.025)*0.50 if serial2=="2002030702509"
replace spiritam2=(1/0.025)*0.35 if serial2=="2002031607411"		// "small bottle" assumed to be 35cl
list serial2 xspirtam spiritam2 spiritam spirit if spiritam==97 | spiritam==98, nol

/* Fortified wine: coded in small glasses (=1 unit), assumed that this is 18% abv or 13.5 units per 75cl bottle
	(cf. drinkaware), which implies a size of 56ml per unit */
list serial2 xsheryam sherryam sherry if sherryam==97 | sherryam==98, nol
capture drop sherryam2
gen sherryam2=sherryam if sherryam~=99					// 99 indicates 'don't know', hence missing
replace sherryam2=.d if sherryam==99
label var sherryam2 "sherryam with unusual values corrected"
replace sherryam2=0.5863*(13.5/0.75) if serial2=="2002020401325"		// "1 pint", bizarrely...
replace sherryam2=(13.5/0.75)*0.75/2 if serial2=="2002020702424"	
replace sherryam2=0.5 if serial2=="2002020702430"					
replace sherryam2=(13.5/0.75)*0.75/3 if serial2=="2002021708413"	
list serial2 xsheryam sherryam2 sherryam sherry if sherryam==97 | sherryam==98, nol

* Wine: coded in number of 125ml glasses as one unit, hence a 75cl bottle is 6 units
list serial2 xwineam wineam wine if wineam==97 | wineam==98, nol
capture drop wineam2
gen wineam2=wineam if wineam~=99						// 99 indicates 'don't know', hence missing
replace wineam2=.d if wineam==99
label var wineam2 "wineam with unusual values corrected"
replace wineam2=6 if serial2=="2002021004229"	
replace wineam2=4.5 if serial2=="2002031305620"				// "a bottle or half a bottle"
replace wineam2=3 if serial2=="2002021607008"	
replace wineam2=1.5 if serial2=="2002021708416"	
list serial2 xwineam wineam2 wineam wine if wineam==97 | wineam==98, nol

* Alcopops: coded in numbers of bottles, with assumed ABV of 5% (Drinkaware) = 1.375 units
list serial2 xallemam allemam allem if allemam==97 | allemam==98, nol
capture drop allemam2
gen allemam2=allemam if allemam~=99					// 99 indicates 'don't know', hence missing
replace allemam2=.d if allemam==99
label var allemam2 "allemam with unusual values corrected"
* NOTE no unusual values to correct


* Other drinks: misc details from drinkaware, all are exacts no. units unless an existing cat., in which case uses scalars
list serial2 otherdr otheram otherd if ifother==1, nol
capture drop otheram2
gen float otheram2=.				
label var otheram2 "otheram made fit for analysis"
scalar allemabv=1.375			// Assumes a 275ml bottle = 1.375 units
scalar spabv=1				// Assumes a 25ml shot = 1 unit
scalar wnabv=1				// Assumes 125ml wine = 1 unit
scalar nmbabv=1				// Assumes half a pint of normal beer = 1 unit
scalar liqueur=1				// All liquers (Malibu, Baileys, Amaretto, Pimms, Port) assumed to be 50ml and 20% abv
scalar baileys=0.9			// Baileys assumed to be 50ml, 17%
scalar archers=1.2			// Archers assumed to be 50ml, 23%	
scalar cocktail=2				// Cocktails and punch assumed to be 2 shots

replace otheram2=spabv*3 if serial2=="2002020100214"		
replace otheram2=spabv*6 if serial2=="2002020401325"				
replace otheram2=liqueur*1 if serial2=="2002020501611"				
replace otheram2=liqueur*1 if serial2=="2002020601917"				
replace otheram2=liqueur*1 if serial2=="2002020601924"				
replace otheram2=liqueur*1 if serial2=="2002020602226"				
replace otheram2=liqueur*1 if serial2=="2002020803205"				
replace otheram2=cocktail*24 if serial2=="2002020903718"				
replace otheram2=0 if serial2=="2002021004206"				// flower remedy assumed to be negligible

replace otheram2=liqueur*1 if serial2=="2002021104430"				
replace otheram2=spabv*1 if serial2=="2002021305314"				
replace otheram2=wnabv/2*2 if serial2=="2002021305404"	// low alcohol wine (4% abv)
replace otheram2=spabv*4 if serial2=="2002021405804"				
replace otheram2=liqueur*3 if serial2=="2002021406425"				
replace otheram2=nmbabv*1 if serial2=="2002021506516"		// homebrew assumed to be normal strength				
replace otheram2=liqueur*1 if serial2=="2002021506816"				
replace otheram2=liqueur*1 if serial2=="2002021506818"				
replace otheram2=liqueur*1 if serial2=="2002021607006"				
replace otheram2=allemabv*2 if serial2=="2002021607030"				

replace otheram2=cocktail*2 if serial2=="2002021607123"				
replace otheram2=spabv*1 if serial2=="2002021707925"		// small glass of absinthe assumed to be a normal shot			
replace otheram2=nmbabv*2/0.5863*0.44 if serial2=="2002021708017"				
replace otheram2=allemabv*4 if serial2=="2002022309916"				
replace otheram2=allemabv*1 if serial2=="2002022309923"				
replace otheram2=liqueur*3 if serial2=="2002030200408"				
replace otheram2=spabv*8 if serial2=="2002030501515"				
replace otheram2=spabv*2 if serial2=="2002030501602"	

replace otheram2=spabv*3 if serial2=="2002030601820"				
replace otheram2=spabv*(1/0.025)*0.75/2 if serial2=="2002030802916"				
replace otheram2=wnabv*2 if serial2=="2002030903409"				
replace otheram2=cocktail*2 if serial2=="2002030903522"				
replace otheram2=spabv*3 if serial2=="2002031003819"				
replace otheram2=nmbabv*1 if serial2=="2002031104410"				
replace otheram2=liqueur*1 if serial2=="2002031305605"				
replace otheram2=liqueur*2 if serial2=="2002031406029"				
replace otheram2=wnabv*1 if serial2=="2002031406030"				

replace otheram2=cocktail*6 if serial2=="2002031506410"	
replace otheram2=cocktail*2 if serial2=="2002031506701"			
replace otheram2=wnabv/2*1 if serial2=="2002031506819"		// Bucks Fizz treated as half-strength wine
replace otheram2=spabv*2 if serial2=="2002031607111"
replace otheram2=spabv*5 if serial2=="2002031607712"
replace otheram2=wnabv*2 if serial2=="2002032309916"

* all the unestimable ones...
replace otheram2=. if serial2=="2002020200402"|serial2=="2002030300819"|serial2=="2002030501720" |serial2=="2002030802617"|serial2=="2002031506818"|serial2=="2002031707810"|serial2=="2002032309910"
list serial2 otherdr otheram otheram2 otherd /*
	*/ if serial2=="2002020200402"|serial2=="2002030300819"|serial2=="2002030501720" |serial2=="2002030802617"|serial2=="2002031506818"|serial2=="2002031707810"|serial2=="2002032309910", nol
list serial2 otherdr otheram otheram2 otherd /*
	*/ if serial2=="2002020200402"|serial2=="2002030300819"|serial2=="2002030501720" |serial2=="2002030802617"|serial2=="2002031506818"|serial2=="2002031707810"|serial2=="2002032309910", nol
* This shows that all of the unestimable ones make very little difference as they are so rare (freq=6|7), hence still included




****************************************************************************************************************
**** Have to construct a yearly drinking variable for the amount drunk, as no DVs included in the dataset...
****************************************************************************************************************

* FREQUENCY RECODING INTO WEEKLY WEIGHTS, e.g. every day =7, once or twice a week = 1.5.
capture drop _stbrew _nmbrew _spirit _sherry _wine _allem _otherd
recode stbrew nmbrew spirit sherry wine allem otherd (1=7)(2=5.5)(3=3.5)(4=1.5)(5=0.375)(6=0.115)(7=0.029)(8=0) /*
	*/ (98=.r) (99=.d) (else=.), gen(_stbrew _nmbrew _spirit _sherry _wine _allem _otherd) 

* CREATING BEVERAGE-SPECIFIC TOTAL - relies on cleaning to be finished before (e.g. no poorly coded missing vals)
capture drop unit*
gen unitstb = (_stbrew*stbrewam2*1.5) if _stbrew<.	// Strong beer
gen unitnmb = (_nmbrew*nmbrewam2) if _nmbrew<.		// Normal beer
gen unitsp  = (_spirit*spiritam2) if _spirit<.		// Spirits
gen unitsh  = (_sherry*sherryam2) if _sherry<.		// Fortified wines
gen unitwn  = (_wine*wineam2) if _wine<.			// Wines
gen unital  = (_allem*allemam2) if _allem<.		// Alcopops ('alcoholic lemonade')
gen unitoth = (_otherd*otheram2) if _otherd<.		// Other drinks (mainly liqeurs)

replace unitstb=0 if unitstb==. & m192_1~=8 & stbrew~=98		// These create 0 values for all non-refusals (inc. abstainers)
replace unitnmb=0 if unitnmb==. & m192_1~=8 & stbrew~=98
replace unitsp=0 if unitsp==. & m192_1~=8 & stbrew~=98
replace unitsh=0 if unitsh==. & m192_1~=8 & stbrew~=98
replace unitwn=0 if unitwn==. & m192_1~=8 & stbrew~=98
replace unital=0 if unital==. & m192_1~=8 & stbrew~=98
replace unitoth=0 if unitoth==. & m192_1~=8 & stbrew~=98

* CREATING AN OVERALL TOTAL
gen unityr2=unitstb+unitnmb+unitsp+unitwn+unital+unitoth

gen unityr=0							
label var unityr "Total weekly units drunk, past 12 months"
replace unityr=unityr+unitstb if unitstb~=.
replace unityr=unityr+unitnmb if unitnmb~=.
replace unityr=unityr+unitsp  if unitsp~=.
replace unityr=unityr+unitsh  if unitsh~=.
replace unityr=unityr+unitwn  if unitwn~=.
replace unityr=unityr+unital  if unital~=.
replace unityr=unityr+unitoth  if unitoth~=.

replace unityr=.d if stbrewam2==.d|nmbrewam2==.d|spiritam2==.d|sherryam2==.d|wineam2==.d|allemam2==.d
replace unityr=.r if m192_1==8 | stbrew==98
drop _stbrew-unitoth
* Sets missing values for people who refused to respond (.r) or couldn't answer the questions (.)




****************************************************************************************************************
**** Constructing other useful vars
****************************************************************************************************************

* DRINKING VARS
capture drop abst*
gen abst=m192_2==2|droften==8				// Creates a dummy var for reported 12mth abstinence
replace abst=.r if m192_1==8				// refusals
label var abst "Reported 12mth abstainers"

capture drop freq12m
gen freq12m=droften						// Sets 12mth frequency to 8 ("not at all") rather than . for abstainers
mvdecode freq12m, mv(98=.r \ 99=.)
replace freq12m=8 if abst==1
label var freq12m "Reported 12mth drinking freq"
label values freq12m droften


* DRINK-DRIVING VARS
capture drop alcfreq drvfreq ddfreq bacfreq
recode freq12m m303_1 m303_11 m303_13 (1=7)(2=5.5)(3=3.5)(4=1.5)(5=0.375)(6=0.115)(7=0.029)(8=0)(9=0) ///
	(98=.r) (99=.d) (.r=.r) (.d=.d) (else=.), gen(alcfreq drvfreq ddfreq bacfreq) 	// useable freq for drinking, driving per se and drink-driving
label var alcfreq "Drinking frequency, 12mth"
label var drvfreq "Driving freq, 12mth"
label var ddfreq "Drink-driving freq, 12mth"
label var bacfreq "Driving over the legal limit freq, 12mth"


/* This generates 0 values for all those who were assumed to be 0 by the routing - that is:
	- Those not driving at all in the past 12 months were not asked about drink-driving (ddfreq changed to 0 from .)
	- Those not drinking at all in the past 12 months were not asked about drink-driving (ddfreq changed to 0 from .)	
	- Those not drink-driving at all in the past 12 months were not asked about dd over the legal-limit	*/
replace ddfreq=0 if drvfreq==0 | (alcfreq==0 & m303_1<.)
replace bacfreq=0 if ddfreq==0

replace ddfreq=.r if m303scin==3 
replace bacfreq=.r if m303scin==3 | ddfreq==.r



* VERY BASIC IMPUTATION - scrapped as doesn't seem to make a difference at all
capture drop ddfreq_o ddfreq_i
gen ddfreq_o = ddfreq
label var ddfreq_o "Original ddfreq variable - no imputation"
gen ddfreq_i = ddfreq
label var ddfreq_i "Imputed ddfreq variable - all those who refused to answer assumed to be drink-drivers"
svy: mean ddfreq if ddfreq<.
matrix define imp1 = e(b)
replace ddfreq_i = imp1[1,1] if ddfreq_o==.r

* This doesn't seem to do anything, for some reason...
capture drop bacfreq_o bacfreq_i
gen bacfreq_o = bacfreq
label var bacfreq_o "Original bacfreq variable - no imputation"
gen bacfreq_i = bacfreq
label var bacfreq_i "Imputed bacfreq variable - all those who refused to answer assumed to be drink-drivers"
svy: mean ddfreq if bacfreq<.
matrix define imp2 = e(b)
replace ddfreq_i = imp2[1,1] if bacfreq_o==.r

capture drop m303_12o m303_12i
gen m303_12o = m303_12
label var m303_12o "Original m303_12 variable - no imputation"
gen m303_12i = m303_12
label var m303_12i "Imputed m303_12 variable - all those who refused to answer assumed to be drink-drivers"
svy: mean m303_12 if m303_12<.
matrix define imp3 = e(b)
replace m303_12i = imp3[1,1] if ddfreq_o==.r

